Wait queues, allowing conditional sleep in hypervisor context.
authorKeir Fraser <keir@xen.org>
Wed, 17 Nov 2010 16:42:37 +0000 (16:42 +0000)
committerKeir Fraser <keir@xen.org>
Wed, 17 Nov 2010 16:42:37 +0000 (16:42 +0000)
Signed-off-by: Keir Fraser <keir@xen.org>
xen/arch/x86/domain.c
xen/arch/x86/hvm/hvm.c
xen/common/Makefile
xen/common/domain.c
xen/common/schedule.c
xen/common/wait.c [new file with mode: 0644]
xen/include/xen/sched.h
xen/include/xen/wait.h [new file with mode: 0644]

index 13ff47555590042f487849f8e20a4bb11e880e94..e9ed3e1643a97b376cb8aad1c41fec98b4542cdc 100644 (file)
@@ -33,6 +33,7 @@
 #include <xen/pci.h>
 #include <xen/paging.h>
 #include <xen/cpu.h>
+#include <xen/wait.h>
 #include <public/sysctl.h>
 #include <asm/regs.h>
 #include <asm/mc146818rtc.h>
@@ -77,6 +78,7 @@ static void continue_idle_domain(struct vcpu *v)
 
 static void continue_nonidle_domain(struct vcpu *v)
 {
+    check_wakeup_from_wait();
     reset_stack_and_jump(ret_from_intr);
 }
 
index 6bd92ce61c90e9d8dc5588d9c5105727aa9885b6..ccf0d88b9e753cd076268869396444a954df3d3b 100644 (file)
@@ -34,6 +34,7 @@
 #include <xen/event.h>
 #include <xen/paging.h>
 #include <xen/cpu.h>
+#include <xen/wait.h>
 #include <asm/shadow.h>
 #include <asm/hap.h>
 #include <asm/current.h>
@@ -283,6 +284,8 @@ void hvm_do_resume(struct vcpu *v)
 
     pt_restore_timer(v);
 
+    check_wakeup_from_wait();
+
     /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
     p = get_ioreq(v);
     while ( p->state != STATE_IOREQ_NONE )
index efdfddd545df62e3f87c6183b9ec1a1001a7bd36..1abca7d64a9f290b8e7b8b111525d1fa889d35df 100644 (file)
@@ -31,6 +31,7 @@ obj-y += timer.o
 obj-y += trace.o
 obj-y += version.o
 obj-y += vsprintf.o
+obj-y += wait.o
 obj-y += xmalloc_tlsf.o
 obj-y += rcupdate.o
 obj-y += tmem.o
index 69a0ab4b621c221e10feefd34942da9c3016fae7..1a08636756c265399563d5de78ecc5fc0ea71029 100644 (file)
@@ -27,6 +27,7 @@
 #include <xen/percpu.h>
 #include <xen/multicall.h>
 #include <xen/rcupdate.h>
+#include <xen/wait.h>
 #include <acpi/cpufreq/cpufreq.h>
 #include <asm/debugger.h>
 #include <public/sched.h>
@@ -162,10 +163,12 @@ struct vcpu *alloc_vcpu(
         v->vcpu_info = ((vcpu_id < XEN_LEGACY_MAX_VCPUS)
                         ? (vcpu_info_t *)&shared_info(d, vcpu_info[vcpu_id])
                         : &dummy_vcpu_info);
+        init_waitqueue_vcpu(v);
     }
 
     if ( sched_init_vcpu(v, cpu_id) != 0 )
     {
+        destroy_waitqueue_vcpu(v);
         free_vcpu_struct(v);
         return NULL;
     }
@@ -173,6 +176,7 @@ struct vcpu *alloc_vcpu(
     if ( vcpu_initialise(v) != 0 )
     {
         sched_destroy_vcpu(v);
+        destroy_waitqueue_vcpu(v);
         free_vcpu_struct(v);
         return NULL;
     }
@@ -617,6 +621,7 @@ static void complete_domain_destroy(struct rcu_head *head)
         tasklet_kill(&v->continue_hypercall_tasklet);
         vcpu_destroy(v);
         sched_destroy_vcpu(v);
+        destroy_waitqueue_vcpu(v);
     }
 
     grant_table_destroy(d);
index 7814b178ce614d711097d5fe40616396f81f2dee..5e86082b15e8a81f207d036fc2a55aa5f8f65dea 100644 (file)
@@ -1396,6 +1396,11 @@ void sched_tick_resume(void)
     SCHED_OP(sched, tick_resume, cpu);
 }
 
+void wait(void)
+{
+    schedule();
+}
+
 #ifdef CONFIG_COMPAT
 #include "compat/schedule.c"
 #endif
diff --git a/xen/common/wait.c b/xen/common/wait.c
new file mode 100644 (file)
index 0000000..cb63f85
--- /dev/null
@@ -0,0 +1,165 @@
+/******************************************************************************
+ * wait.c
+ * 
+ * Sleep in hypervisor context for some event to occur.
+ */
+
+#include <xen/config.h>
+#include <xen/sched.h>
+#include <xen/wait.h>
+
+struct waitqueue_vcpu {
+    struct list_head list;
+    struct vcpu *vcpu;
+#ifdef CONFIG_X86
+    /*
+     * Xen/x86 does not have per-vcpu hypervisor stacks. So we must save the
+     * hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
+     */
+    void *esp;
+    char stack[1500];
+#endif
+};
+
+int init_waitqueue_vcpu(struct vcpu *v)
+{
+    struct waitqueue_vcpu *wqv;
+
+    wqv = xmalloc(struct waitqueue_vcpu);
+    if ( wqv == NULL )
+        return -ENOMEM;
+
+    memset(wqv, 0, sizeof(*wqv));
+    INIT_LIST_HEAD(&wqv->list);
+    wqv->vcpu = v;
+
+    v->waitqueue_vcpu = wqv;
+
+    return 0;
+}
+
+void destroy_waitqueue_vcpu(struct vcpu *v)
+{
+    struct waitqueue_vcpu *wqv;
+
+    wqv = v->waitqueue_vcpu;
+    if ( wqv == NULL )
+        return;
+
+    BUG_ON(!list_empty(&wqv->list));
+    xfree(wqv);
+
+    v->waitqueue_vcpu = NULL;
+}
+
+void init_waitqueue_head(struct waitqueue_head *wq)
+{
+    spin_lock_init(&wq->lock);
+    INIT_LIST_HEAD(&wq->list);
+}
+
+void wake_up(struct waitqueue_head *wq)
+{
+    struct waitqueue_vcpu *wqv;
+
+    spin_lock(&wq->lock);
+
+    while ( !list_empty(&wq->list) )
+    {
+        wqv = list_entry(wq->list.next, struct waitqueue_vcpu, list);
+        list_del_init(&wqv->list);
+        vcpu_unpause(wqv->vcpu);
+    }
+
+    spin_unlock(&wq->lock);
+}
+
+#ifdef CONFIG_X86
+
+static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
+{
+    char *cpu_info = (char *)get_cpu_info();
+    asm volatile (
+#ifdef CONFIG_X86_64
+        "push %%rax; push %%rbx; push %%rcx; push %%rdx; push %%rdi; "
+        "push %%rbp; push %%r8; push %%r9; push %%r10; push %%r11; "
+        "push %%r12; push %%r13; push %%r14; push %%r15; call 1f; "
+        "1: mov 80(%%rsp),%%rdi; mov 96(%%rsp),%%rcx; mov %%rsp,%%rsi; "
+        "sub %%rsi,%%rcx; rep movsb; mov %%rsp,%%rsi; pop %%rax; "
+        "pop %%r15; pop %%r14; pop %%r13; pop %%r12; "
+        "pop %%r11; pop %%r10; pop %%r9; pop %%r8; "
+        "pop %%rbp; pop %%rdi; pop %%rdx; pop %%rcx; pop %%rbx; pop %%rax"
+#else
+        "push %%eax; push %%ebx; push %%ecx; push %%edx; push %%edi; "
+        "push %%ebp; call 1f; "
+        "1: mov 8(%%esp),%%edi; mov 16(%%esp),%%ecx; mov %%esp,%%esi; "
+        "sub %%esi,%%ecx; rep movsb; mov %%esp,%%esi; pop %%eax; "
+        "pop %%ebp; pop %%edi; pop %%edx; pop %%ecx; pop %%ebx; pop %%eax"
+#endif
+        : "=S" (wqv->esp)
+        : "c" (cpu_info), "D" (wqv->stack)
+        : "memory" );
+    BUG_ON((cpu_info - (char *)wqv->esp) > sizeof(wqv->stack));
+}
+
+static void __finish_wait(struct waitqueue_vcpu *wqv)
+{
+    wqv->esp = NULL;
+}
+
+void check_wakeup_from_wait(void)
+{
+    struct waitqueue_vcpu *wqv = current->waitqueue_vcpu;
+
+    ASSERT(list_empty(&wqv->list));
+
+    if ( likely(wqv->esp == NULL) )
+        return;
+
+    asm volatile (
+        "mov %1,%%"__OP"sp; rep movsb; jmp *(%%"__OP"sp)"
+        : : "S" (wqv->stack), "D" (wqv->esp),
+        "c" ((char *)get_cpu_info() - (char *)wqv->esp)
+        : "memory" );
+}
+
+#else /* !CONFIG_X86 */
+
+#define __prepare_to_wait(wqv) ((void)0)
+#define __finish_wait(wqv) ((void)0)
+
+#endif
+
+void prepare_to_wait(struct waitqueue_head *wq)
+{
+    struct vcpu *curr = current;
+    struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
+
+    ASSERT(list_empty(&wqv->list));
+
+    spin_lock(&wq->lock);
+    list_add_tail(&wqv->list, &wq->list);
+    vcpu_pause_nosync(curr);
+    spin_unlock(&wq->lock);
+
+    __prepare_to_wait(wqv);
+}
+
+void finish_wait(struct waitqueue_head *wq)
+{
+    struct vcpu *curr = current;
+    struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
+
+    __finish_wait(wqv);
+
+    if ( list_empty(&wqv->list) )
+        return;
+
+    spin_lock(&wq->lock);
+    if ( !list_empty(&wqv->list) )
+    {
+        list_del_init(&wqv->list);
+        vcpu_unpause(curr);
+    }
+    spin_unlock(&wq->lock);
+}
index 955ec997112f912b9e39b0f51fe7baa32d1f2abf..e817e3128b231db9564d9b3b36f8bcfa450562d8 100644 (file)
@@ -80,6 +80,8 @@ int  evtchn_init(struct domain *d); /* from domain_create */
 void evtchn_destroy(struct domain *d); /* from domain_kill */
 void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */
 
+struct waitqueue_vcpu;
+
 struct vcpu 
 {
     int              vcpu_id;
@@ -173,6 +175,8 @@ struct vcpu
     /* Multicall information. */
     struct mc_state  mc_state;
 
+    struct waitqueue_vcpu *waitqueue_vcpu;
+
     struct arch_vcpu arch;
 };
 
diff --git a/xen/include/xen/wait.h b/xen/include/xen/wait.h
new file mode 100644 (file)
index 0000000..c1793fd
--- /dev/null
@@ -0,0 +1,54 @@
+/******************************************************************************
+ * wait.h
+ * 
+ * Sleep in hypervisor context for some event to occur.
+ */
+
+#ifndef __XEN_WAIT_H__
+#define __XEN_WAIT_H__
+
+#include <xen/types.h>
+#include <xen/list.h>
+#include <xen/spinlock.h>
+
+struct waitqueue_head {
+    spinlock_t lock;
+    struct list_head list;
+};
+
+/* Statically define and initialise a waitqueue. */
+#define DEFINE_WAITQUEUE_HEAD(name)             \
+    struct waitqueue_head name = {              \
+        .lock = SPIN_LOCK_UNLOCKED,             \
+        .list = LIST_HEAD_INIT((name).list)     \
+    }
+
+/* Dynamically initialise a waitqueue. */
+void init_waitqueue_head(struct waitqueue_head *wq);
+
+/* Wake all VCPUs waiting on specified waitqueue. */
+void wake_up(struct waitqueue_head *wq);
+
+/* Wait on specified waitqueue until @condition is true. */
+#define wait_event(wq, condition)               \
+do {                                            \
+    if ( condition )                            \
+        break;                                  \
+    for ( ; ; ) {                               \
+        prepare_to_wait(&wq);                   \
+        if ( condition )                        \
+            break;                              \
+        wait();                                 \
+    }                                           \
+    finish_wait(&wq);                           \
+} while (0)
+
+/* Private functions. */
+int init_waitqueue_vcpu(struct vcpu *v);
+void destroy_waitqueue_vcpu(struct vcpu *v);
+void prepare_to_wait(struct waitqueue_head *wq);
+void wait(void);
+void finish_wait(struct waitqueue_head *wq);
+void check_wakeup_from_wait(void);
+
+#endif /* __XEN_WAIT_H__ */